#! /usr/local/bin/R
#
# build_reg_data.R
#
#
# 
#
#
# Created on     January  05   2020
# Last modified  December 01   2020
#
#
# ------------------------------------------------------------------------------------------

# ------------------------------------------------------------------------------------------
message("Log file for code executed at\n")
message(format(Sys.time(), "%a %b %d %X %Y"))
# ------------------------------------------------------------------------------------------

# ------------------------------------------------------------------------------------------
library(crayon)
library(devtools)

library(ggplot2)
library(bit64);
library(magrittr);
library(stringr);
library(glue)
library(haven)
library(data.table);
library(statar)
# ------------------------------------------------------------------------------------------

# ------------------------------------------------------------------------------------------
# APPEND REQUIRED PACKAGES
check_file = file.exists("log/R-session-info.log.R")
sink("log/R-session-info.log.R", append=check_file)
cat(bold("\n\n# -----\n# Session info for event_cumecu.R\n\n")) 
session_info()
sink()
# ------------------------------------------------------------------------------------------

dlist <- c("PCA", "PCA_resid")

for (ext in dlist) {
  
  # ------------------------------------------------------------------------------------------
  # LOAD THE DATA
  dt_event_daily <- read_dta(glue("../task_LR/output/LRbetas_daily_trade_{ext}.dta")) %>% data.table
  
  dt_event_daily[]
  # ------------------------------------------------------------------------------------------
  
  
  # ------------------------------------------------------------------------------------------
  # Defining log variables
  dt_event_daily[, `:=`(log_trade = log(trade_gross), log_export = log(export), log_import = log(import)) ]
  dt_event_daily[, `:=`(trade_share = trade_gross / gdp_o * 100 ) ]
  
  # Defining EIA variables. 
  setorder(dt_event_daily, ISOPAIR, year)
  dt_event_daily[, l_EIA := tlag(EIA, time = year), by = .(ISOPAIR) ]
  dt_event_daily[ year > 2012, EIA := .SD[year==2013]$l_EIA, by = .(ISOPAIR) ] # fill forward 
  
  
  dt_event_daily[ year > 2012 & !is.na(EIA_e), EIA := EIA_e ] 
  # ------------------------------------------------------------------------------------------
  
  
  # FILL SOME DUMMIES
  dt_event_daily[, CUMECU := fifelse(EIA == 4 | EIA == 5 | EIA == 6, 1, 0, na=0) ]
  dt_event_daily[, FTA    := fifelse(EIA == 3,                       1, 0, na=0) ]
  dt_event_daily[, TWPTA  := fifelse(EIA == 2,                       1, 0, na=0) ]
  dt_event_daily[, OWPTA  := fifelse(EIA == 1,                       1, 0, na=0) ]
  dt_event_daily[, PTA    := fifelse(EIA == 1 | EIA == 2,            1, 0, na=0) ]
  
  # Identifying missing observations
  dt_event_daily[ is.na(EIA) | EIA == -999, CUMECU := NA ]
  dt_event_daily[ is.na(EIA) | EIA == -999, FTA    := NA ]
  dt_event_daily[ is.na(EIA) | EIA == -999, TWPTA  := NA ]
  dt_event_daily[ is.na(EIA) | EIA == -999, OWPTA  := NA ]
  dt_event_daily[ is.na(EIA) | EIA == -999, PTA    := NA ]
  
  # Identifying emerging & developed countries
  list_emerging = c("BRL","CLP","CNY","COP","CZK","EGP","GRD","HUF","INR","IDR","KRW","MYR","MXN","PEN",
                    "PHP","PLN","QAR","RUB","ZAR","THB","TWD","TRY","AED")
  dt_event_daily[, `:=`(emerging_base = 0, emerging_foreign = 0) ]
  dt_event_daily[base    %in% list_emerging, emerging_base    := 1 ]
  dt_event_daily[foreign %in% list_emerging, emerging_foreign := 1 ]
  
  # RESCALE LHS VARIABLE OF VOLATILITY FOR LISABILITY
  dt_event_daily[, fx_vol_annual  := fx_vol   * sqrt(252) ]
  dt_event_daily[, fx_vol  := fx_vol  * 100 ]
  dt_event_daily[, vol_adj := var_adj ]
  # WINSORIZE THE VARIABLES
  dt_event_daily[, win_beta := fifelse(beta>quantile(beta, 0.01, na.rm = T) & beta<quantile(beta, 0.99, na.rm = T), 1, 0, na=0) ]
  dt_event_daily[, win_r2   := fifelse(R2_y>quantile(R2_y, 0.01, na.rm=T) & R2_y<quantile(R2_y,0.99, na.rm=T), 1, 0, na=0) ]
  dt_event_daily[, win_vol  := fifelse(fx_vol>quantile(fx_vol, 0.01, na.rm=T) & fx_vol<quantile(fx_vol,0.99, na.rm=T), 1, 0, na=0) ]
  dt_event_daily[, win_vol_annual  := fifelse(fx_vol_annual>quantile(fx_vol_annual, 0.01, na.rm=T) & fx_vol_annual<quantile(fx_vol_annual,0.99, na.rm=T), 1, 0, na=0) ]
  dt_event_daily[, win_vol_adj := fifelse(vol_adj>quantile(vol_adj, 0.01, na.rm=T) & vol_adj<quantile(vol_adj,0.99, na.rm=T), 1, 0, na=0) ]
  # OTHER VARIABLES FOR REGRESSION
  dt_event_daily[, `:=`(ISO1 = base, ISO2 = foreign, datey = as.factor(year)) ]
  dt_event_daily[, ISO1_year := interaction(base, year, sep=":") ]
  dt_event_daily[, ISO2_year := interaction(foreign, year, sep=":") ]
  dt_event_daily <- dt_event_daily[ !is.na(ISOPAIR) & ISOPAIR != "" ]
  
  # Constructing Richmond (2019) trade centrality measure
  dt_event_daily[, export_total := sum(import, na.rm=T), by = .(base, year) ]  # sum of all exports oF FOREIGN COUNTRY, per year
  dt_event_daily[, export_share := import / export_total ] # compute the weights (note: imports of base country = exports of foreign country! Don't get confused)
  
  dt_event_daily[, trade_intensity := trade_gross / (gdp_o + gdp_d) ] # compute trade intensity
  dt_event_daily[, cent_measure_j :=  trade_intensity * export_share ] # compute centrality measure
  dt_event_daily[, cent_measure :=  sum(cent_measure_j, na.rm=T), by = .(base, year) ] # compute centrality measure
  
  dt_aux <- dt_event_daily[, c("cent_measure", "base", "year")][, lapply(.SD, mean, na.rm=T), by = c("base", "year")]
  dt_aux[ cent_measure == 0, cent_measure := NA ]
  dt_aux[, cent_rank := rank(cent_measure, na.last = "keep", ties.method = "min"), by = year ]
  dt_aux <- dt_aux[, -c("cent_measure")]
  dt_aux2 <- dt_aux[year == 1990] ## Pick one year just to check if ranking makes sense
  dt_aux2 <- dt_aux2[, -c("year")]
  setnames(dt_aux2, "cent_rank", "cent_rank_1990")
  
  # Check ranking:
  dt_aux2[order(-cent_rank_1990)]
  
  dt_event_daily <- merge(dt_event_daily, dt_aux, by = c("base", "year"))
  dt_event_daily <- merge(dt_event_daily, dt_aux2, by = c("base"))
  rm(dt_aux)
  rm(dt_aux2)
  
  saveRDS(dt_event_daily, glue("./output/trade_fxdaily_reg_data_{ext}.rds"))
  
  # ------------------------------------------------------------------------------------------
}
